library(tidyverse)
library(ggplot2)
library(wordcloud)
library("leaflet")
crime_boston <- read.csv("E:\\ND\\module1\\crime.csv")
str(crime_boston)
crime_rank <- crime_boston %>%
group_by(UCR_PART,OFFENSE_CODE_GROUP) %>%
summarize(count = n()) %>%
filter(count>0) %>%
arrange(desc(count))
map_boston <- leaflet() %>%
setView(lng = -71.0589, lat = 42.3601, zoom = 12)
map_boston %>%
addTiles() %>%
addMarkers(data = crime_boston, clusterOptions = markerClusterOptions())
## Assuming "Long" and "Lat" are longitude and latitude, respectively
Arson <- crime_boston[crime_boston$OFFENSE_CODE_GROUP == "Arson",]
Homicide <- crime_boston[crime_boston$OFFENSE_CODE_GROUP == "Homicide",]
Embezzlement <- crime_boston[crime_boston$OFFENSE_CODE_GROUP == "Embezzlement",]
map_boston %>%
addProviderTiles(providers$MtbMap) %>%
addProviderTiles(providers$Stamen.TonerLines,
options = providerTileOptions(opacity = 0.35)) %>%
addProviderTiles(providers$Stamen.TonerLabels)%>%
addCircleMarkers(data = Homicide, lat = ~ Lat, lng = ~ Long,radius = 4,
color = "#FF6347",opacity = 0.7) %>%
addCircleMarkers(data = Embezzlement, lat = ~ Lat, lng = ~ Long,
radius = 4,color = "#2E8B57",opacity = 0.7) %>%
addCircleMarkers(data = Arson, lat = ~ Lat, lng = ~ Long,
radius = 4,color = "#4169E1",opacity = 0.7)
pal = brewer.pal(9,"Blues")
street_name <- as.data.frame(table(crime_boston$STREET))
colnames(street_name) <- c("Street_Name", "Count")
wordcloud(street_name$Street_Name, street_name$Count, min.freq = 200, random.order = F, random.color = F, colors =c("black", "cornflowerblue", "darkred"), scale = c(2,.3))
area.color <- c(NA, NA, NA, NA, NA, NA, "withcolor", "withcolor", NA, NA, NA, NA)
crime_boston %>%
group_by(MONTH) %>%
summarize(number = n()) %>%
ggplot()+
geom_col(aes(x = as.factor(MONTH), y = number, fill = area.color)) +
theme(legend.position = "none") +
xlab("Month") +
ylab("Crime Count") +
ggtitle("Crime Total By Month") +
theme(plot.title = element_text(size=18, face="bold.italic"))
# Breaking day into 6 interval period
crime_boston1 <- crime_boston
time_diff <- c("0", "6", "12", "18", "24")
crime_boston1$time_diff <- cut(crime_boston1$HOUR,
breaks = time_diff,
labels = c("00-06", "06-12", "12-18", "18-24"),
include.lowest = TRUE)
table(crime_boston1$time_diff)
##
## 00-06 06-12 12-18 18-24
## 48402 88107 111493 71071
crime_boston1 <- crime_boston1 %>%
mutate(shift = ifelse(time_diff == "00-06", "Late Night",
ifelse(time_diff == "06-12", "Day",
ifelse(time_diff == "12-18", "Afternoon","Evening"))))
table(crime_boston1$shift)
##
## Afternoon Day Evening Late Night
## 111493 88107 71071 48402
crime_boston1 %>%
group_by(time_diff) %>%
summarise(number = n()) %>%
ggplot(aes(x = time_diff, y = number)) +
geom_col(aes(fill = as.factor(number))) +
scale_fill_brewer(palette = "YlOrBr") +
xlab("Time of the Day") +
ylab("Crime Count") +
ylim(0,120000) +
geom_text(aes(label = number), vjust = 1.6, color = "black",
position = position_dodge(0.9), size = 3.5) +
theme(legend.position = "none") +
ggtitle("Crime Total By Time") +
theme(plot.title = element_text(size=18, face="bold.italic"))
Part One: Aggravated assault, and Homocide are classified as violent while Auto Theft, Commercial Burglary, Larceny, and Robbery are classified as property crimes.
Part Two: Drug Violation, Simple Assault, Vandalism, Fraud, Harassment, Disorderly Conduct, Violations, Recovered Stolen Property, Counterfeiting, Ballistics, Operating Under the Influence, Offenses Against Child / Family, Evading Fare, Embezzlement, Prisoner Related Incidents, Prostitution, Criminal Harassment, Fire Related Reports, Bomb Hoax, Phone Call Complaints, Explosives, Gambling, Biological Threat
Part Three: Motor Vehicle Accident Response, Medical Assistance, Investigate Person/Property, Verbal Disputes, Towed, Property Lost, Warrant Arrests, Search Warrant, Property Found, Police Service Incidents, Fire Related Reports, License Violation, Assembly or Gathering Violations, Property Related Damage, Firearm Discovery, License Plate Related Incidents, Service, Harbor Related Incidents, Prisoner Related Incidents, Explosives.
Part One crimes are more fatal and serious while Part Three crimes are merely reports or incidence responses.
# Reclassify UCR_PART, since Part One means the severity level is the highest, assign number "3" to this category
# Accordingly, since Part Three is the lowest in the severity level, assign number "1" to this category
crime_boston2 <- crime_boston
crime_boston2$UCR_PART <- as.character(crime_boston2$UCR_PART)
crime_boston2$UCR_PART[crime_boston2$UCR_PART == "Part One"] <- 3
crime_boston2$UCR_PART[crime_boston2$UCR_PART == "Part Two"] <- 2
crime_boston2$UCR_PART[crime_boston2$UCR_PART == "Part Three"] <- 1
# Create a crime_count data.frame
# Weighted total: take into account of both the severity level of crimes and the number of crimes in each district
crime_count <- crime_boston2 %>%
filter(UCR_PART == 1 | UCR_PART == 2 | UCR_PART == 3) %>%
mutate(DISTRICT = stringr::str_replace_all(.$DISTRICT, "[0-9]*", ""),
DISTRICT = as.character(DISTRICT)) %>%
filter(DISTRICT != "") %>%
group_by(DISTRICT, UCR_PART) %>%
summarise(count = n()) %>%
mutate(UCR_PART = as.numeric(UCR_PART), count = as.numeric(count)) %>%
mutate(total = UCR_PART * count)
# Calculate percentage of total based on weighted total
# so that the y-axis for the bar chart will be clearer
crime_count <- crime_count %>%
select(-UCR_PART, -count) %>%
group_by(DISTRICT) %>%
summarise(Weighted_Total = sum(total)) %>%
mutate(Percent_Total = round(Weighted_Total / sum(Weighted_Total), digits = 2))
# Create a crime_district data.frame
# Remove the numbers in the District column to change smaller district divisions to more general divisions
crime_district <- crime_boston2 %>%
filter(UCR_PART == 1 | UCR_PART == 2 | UCR_PART == 3) %>%
mutate(UCR_PART = as.numeric(UCR_PART)) %>%
mutate(DISTRICT = stringr::str_replace_all(.$DISTRICT, "[0-9]*", ""),
DISTRICT = as.character(DISTRICT)) %>%
filter(DISTRICT != "") %>%
select(INCIDENT_NUMBER, DISTRICT, Lat, Long)
# Inner join two tables
innerTest = inner_join(crime_district, crime_count, by = c("DISTRICT"))
# Bar chart
ggplot(crime_count, aes(x = DISTRICT, y = Percent_Total)) +
geom_col(aes(fill = as.factor(Percent_Total))) +
scale_fill_brewer() +
geom_text(aes(label = Percent_Total), vjust = 1.6, color = "black",
position = position_dodge(0.9), size = 3.5) +
theme(legend.position = "None") +
labs(title = "Severity+Count of Crimes per District in Boston")
# Map
qplot(Long, Lat, data = innerTest, color = as.factor(Weighted_Total),
geom = 'point',
xlim = c(-71.2, -70.95),
ylim = c(42.22, 42.4)) +
scale_color_brewer() +
theme_bw(base_size = 15) +
theme(legend.position = "None",
axis.title = element_blank()) +
geom_point(size = 0.5) +
labs(title = "Severity+Count of Crimes per district in Boston")
## Warning: Removed 19802 rows containing missing values (geom_point).
## Warning: Removed 19802 rows containing missing values (geom_point).